#Mecca.csv
import numpy as np
import pandas as pd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from pylab import rcParams
import warnings
import seaborn as sns
rcParams["figure.figsize"]=(30,18)
warnings.filterwarnings("ignore")
import os
for dirname, _, filenames in os.walk('CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/'):
for filename in filenames:
print(os.path.join(dirname, filename))
CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/Mecca.csv CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/total-rain-fall-in-mm-observed-by-pme-met-station-2009.csv
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, GRU
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
import itertools
import random
import os
WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.
data = pd.read_csv('CSV/total-rain-fall-in-mm-observed-by-pme-met-station-2009/Mecca.csv', header=0)
train_percent=0.60
num_epochs=150
data
| Date | Value | |
|---|---|---|
| 0 | 1-Jan-01 | 10.2 |
| 1 | 1-Feb-01 | 3.1 |
| 2 | 1-Mar-01 | 6.1 |
| 3 | 1-Apr-01 | 7.1 |
| 4 | 1-May-01 | 3.4 |
| ... | ... | ... |
| 1447 | 1-Aug-21 | 0.1 |
| 1448 | 1-Sep-21 | 0.3 |
| 1449 | 1-Oct-21 | 2.4 |
| 1450 | 1-Nov-21 | 14.2 |
| 1451 | 1-Dec-21 | 24.8 |
1452 rows × 2 columns
print(len(data['Value'].unique()))
symbols = data['Value'].unique()
248
data['Value'].describe()
count 1452.000000 mean 6.188085 std 8.052472 min 0.000000 25% 0.200000 50% 3.000000 75% 9.600000 max 52.000000 Name: Value, dtype: float64
#random.seed(42) #explain!
choosen_symbol = random.choice(symbols)
print(choosen_symbol)
35.5
data_mod = data.drop(['Date'], axis=1)
data_mod
| Value | |
|---|---|
| 0 | 10.2 |
| 1 | 3.1 |
| 2 | 6.1 |
| 3 | 7.1 |
| 4 | 3.4 |
| ... | ... |
| 1447 | 0.1 |
| 1448 | 0.3 |
| 1449 | 2.4 |
| 1450 | 14.2 |
| 1451 | 24.8 |
1452 rows × 1 columns
# Feature Scaling
sc = MinMaxScaler(feature_range=(0, 1))
data_mod_scaled = sc.fit_transform(data_mod)
# Creating a data structure (it does not work when you have only one feature)
def create_data(df, n_future, n_past, train_test_split_percentage, validation_split_percentage):
n_feature = df.shape[1]
x_data, y_data = [], []
for i in range(n_past, len(df) - n_future + 1):
x_data.append(df[i - n_past:i, 0:n_feature])
y_data.append(df[i + n_future - 1:i + n_future, 0])
split_training_test_starting_point = int(round(train_test_split_percentage*len(x_data)))
split_train_validation_starting_point = int(round(split_training_test_starting_point*(1-validation_split_percentage)))
x_train = x_data[:split_train_validation_starting_point]
y_train = y_data[:split_train_validation_starting_point]
# if you want to choose the validation set by yourself, uncomment the below code.
x_val = x_data[split_train_validation_starting_point:split_training_test_starting_point]
y_val = x_data[split_train_validation_starting_point:split_training_test_starting_point]
x_test = x_data[split_training_test_starting_point:]
y_test = y_data[split_training_test_starting_point:]
return np.array(x_train), np.array(x_test), np.array(x_val), np.array(y_train), np.array(y_test), np.array(y_val)
# Number of days you want to predict into the future
# Number of past days you want to use to predict the future
X_train, X_test, X_val, y_train, y_test, y_val = create_data(data_mod_scaled, n_future=1, n_past=25, train_test_split_percentage=train_percent,
validation_split_percentage = 0)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
(856, 25, 1) (571, 25, 1) (856, 1) (571, 1)
# ------------------LSTM-----------------------
regressor = Sequential()
regressor.add(LSTM(units=16, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units=16, return_sequences=False))
regressor.add(Dropout(0.2))
regressor.add(Dense(units=1, activation='linear'))
regressor.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])
regressor.summary()
WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\optimizers\__init__.py:309: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 25, 16) 1152
dropout (Dropout) (None, 25, 16) 0
lstm_1 (LSTM) (None, 16) 2112
dropout_1 (Dropout) (None, 16) 0
dense (Dense) (None, 1) 17
=================================================================
Total params: 3281 (12.82 KB)
Trainable params: 3281 (12.82 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
#es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
#mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
# fit model
history = regressor.fit(X_train, y_train, validation_split=0.3, epochs=num_epochs, batch_size=64)
#history = regressor.fit(X_train, y_train, validation_split=0.3, epochs=num_epochs, batch_size=64, callbacks=[es])
Epoch 1/150 WARNING:tensorflow:From C:\Users\Mshika21\anaconda3\Lib\site-packages\keras\src\utils\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead. 10/10 [==============================] - 7s 162ms/step - loss: 0.0324 - root_mean_squared_error: 0.1801 - val_loss: 0.0233 - val_root_mean_squared_error: 0.1525 Epoch 2/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0254 - root_mean_squared_error: 0.1593 - val_loss: 0.0241 - val_root_mean_squared_error: 0.1553 Epoch 3/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0249 - root_mean_squared_error: 0.1577 - val_loss: 0.0228 - val_root_mean_squared_error: 0.1510 Epoch 4/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0241 - root_mean_squared_error: 0.1551 - val_loss: 0.0224 - val_root_mean_squared_error: 0.1497 Epoch 5/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0240 - root_mean_squared_error: 0.1549 - val_loss: 0.0222 - val_root_mean_squared_error: 0.1491 Epoch 6/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0239 - root_mean_squared_error: 0.1545 - val_loss: 0.0222 - val_root_mean_squared_error: 0.1492 Epoch 7/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0230 - root_mean_squared_error: 0.1516 - val_loss: 0.0222 - val_root_mean_squared_error: 0.1491 Epoch 8/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0227 - root_mean_squared_error: 0.1508 - val_loss: 0.0217 - val_root_mean_squared_error: 0.1473 Epoch 9/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0225 - root_mean_squared_error: 0.1499 - val_loss: 0.0213 - val_root_mean_squared_error: 0.1459 Epoch 10/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0219 - root_mean_squared_error: 0.1481 - val_loss: 0.0213 - val_root_mean_squared_error: 0.1460 Epoch 11/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0213 - root_mean_squared_error: 0.1458 - val_loss: 0.0203 - val_root_mean_squared_error: 0.1426 Epoch 12/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0206 - root_mean_squared_error: 0.1435 - val_loss: 0.0203 - val_root_mean_squared_error: 0.1426 Epoch 13/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0202 - root_mean_squared_error: 0.1421 - val_loss: 0.0188 - val_root_mean_squared_error: 0.1369 Epoch 14/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0195 - root_mean_squared_error: 0.1395 - val_loss: 0.0187 - val_root_mean_squared_error: 0.1368 Epoch 15/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0188 - root_mean_squared_error: 0.1373 - val_loss: 0.0173 - val_root_mean_squared_error: 0.1317 Epoch 16/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0181 - root_mean_squared_error: 0.1346 - val_loss: 0.0205 - val_root_mean_squared_error: 0.1431 Epoch 17/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0187 - root_mean_squared_error: 0.1369 - val_loss: 0.0169 - val_root_mean_squared_error: 0.1300 Epoch 18/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0184 - root_mean_squared_error: 0.1355 - val_loss: 0.0160 - val_root_mean_squared_error: 0.1263 Epoch 19/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0171 - root_mean_squared_error: 0.1308 - val_loss: 0.0177 - val_root_mean_squared_error: 0.1331 Epoch 20/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0173 - root_mean_squared_error: 0.1315 - val_loss: 0.0156 - val_root_mean_squared_error: 0.1250 Epoch 21/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0161 - root_mean_squared_error: 0.1268 - val_loss: 0.0171 - val_root_mean_squared_error: 0.1309 Epoch 22/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0159 - root_mean_squared_error: 0.1260 - val_loss: 0.0145 - val_root_mean_squared_error: 0.1205 Epoch 23/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0150 - root_mean_squared_error: 0.1224 - val_loss: 0.0150 - val_root_mean_squared_error: 0.1223 Epoch 24/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0148 - root_mean_squared_error: 0.1215 - val_loss: 0.0144 - val_root_mean_squared_error: 0.1199 Epoch 25/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0146 - root_mean_squared_error: 0.1206 - val_loss: 0.0131 - val_root_mean_squared_error: 0.1146 Epoch 26/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0141 - root_mean_squared_error: 0.1185 - val_loss: 0.0136 - val_root_mean_squared_error: 0.1168 Epoch 27/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0134 - root_mean_squared_error: 0.1158 - val_loss: 0.0135 - val_root_mean_squared_error: 0.1163 Epoch 28/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0133 - root_mean_squared_error: 0.1151 - val_loss: 0.0134 - val_root_mean_squared_error: 0.1158 Epoch 29/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0125 - root_mean_squared_error: 0.1120 - val_loss: 0.0128 - val_root_mean_squared_error: 0.1132 Epoch 30/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0129 - root_mean_squared_error: 0.1134 - val_loss: 0.0126 - val_root_mean_squared_error: 0.1124 Epoch 31/150 10/10 [==============================] - 0s 23ms/step - loss: 0.0127 - root_mean_squared_error: 0.1126 - val_loss: 0.0125 - val_root_mean_squared_error: 0.1116 Epoch 32/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0126 - root_mean_squared_error: 0.1122 - val_loss: 0.0125 - val_root_mean_squared_error: 0.1118 Epoch 33/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0129 - root_mean_squared_error: 0.1137 - val_loss: 0.0120 - val_root_mean_squared_error: 0.1096 Epoch 34/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0124 - root_mean_squared_error: 0.1112 - val_loss: 0.0129 - val_root_mean_squared_error: 0.1138 Epoch 35/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0121 - root_mean_squared_error: 0.1099 - val_loss: 0.0119 - val_root_mean_squared_error: 0.1089 Epoch 36/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0121 - root_mean_squared_error: 0.1100 - val_loss: 0.0126 - val_root_mean_squared_error: 0.1123 Epoch 37/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0118 - root_mean_squared_error: 0.1087 - val_loss: 0.0121 - val_root_mean_squared_error: 0.1100 Epoch 38/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0113 - root_mean_squared_error: 0.1062 - val_loss: 0.0116 - val_root_mean_squared_error: 0.1077 Epoch 39/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0121 - root_mean_squared_error: 0.1099 - val_loss: 0.0113 - val_root_mean_squared_error: 0.1063 Epoch 40/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0114 - root_mean_squared_error: 0.1067 - val_loss: 0.0122 - val_root_mean_squared_error: 0.1102 Epoch 41/150 10/10 [==============================] - 0s 23ms/step - loss: 0.0109 - root_mean_squared_error: 0.1044 - val_loss: 0.0112 - val_root_mean_squared_error: 0.1058 Epoch 42/150 10/10 [==============================] - 0s 24ms/step - loss: 0.0116 - root_mean_squared_error: 0.1077 - val_loss: 0.0114 - val_root_mean_squared_error: 0.1069 Epoch 43/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0117 - root_mean_squared_error: 0.1080 - val_loss: 0.0109 - val_root_mean_squared_error: 0.1043 Epoch 44/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0107 - root_mean_squared_error: 0.1034 - val_loss: 0.0115 - val_root_mean_squared_error: 0.1071 Epoch 45/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0108 - root_mean_squared_error: 0.1041 - val_loss: 0.0109 - val_root_mean_squared_error: 0.1046 Epoch 46/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0114 - root_mean_squared_error: 0.1067 - val_loss: 0.0112 - val_root_mean_squared_error: 0.1057 Epoch 47/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0105 - root_mean_squared_error: 0.1027 - val_loss: 0.0105 - val_root_mean_squared_error: 0.1027 Epoch 48/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0106 - root_mean_squared_error: 0.1031 - val_loss: 0.0105 - val_root_mean_squared_error: 0.1024 Epoch 49/150 10/10 [==============================] - 0s 23ms/step - loss: 0.0106 - root_mean_squared_error: 0.1032 - val_loss: 0.0109 - val_root_mean_squared_error: 0.1044 Epoch 50/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0109 - root_mean_squared_error: 0.1045 - val_loss: 0.0103 - val_root_mean_squared_error: 0.1015 Epoch 51/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0104 - root_mean_squared_error: 0.1021 - val_loss: 0.0108 - val_root_mean_squared_error: 0.1040 Epoch 52/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0106 - root_mean_squared_error: 0.1027 - val_loss: 0.0103 - val_root_mean_squared_error: 0.1015 Epoch 53/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0109 - root_mean_squared_error: 0.1042 - val_loss: 0.0104 - val_root_mean_squared_error: 0.1020 Epoch 54/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0112 - root_mean_squared_error: 0.1059 - val_loss: 0.0106 - val_root_mean_squared_error: 0.1028 Epoch 55/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0099 - root_mean_squared_error: 0.0993 - val_loss: 0.0107 - val_root_mean_squared_error: 0.1035 Epoch 56/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0103 - root_mean_squared_error: 0.1016 - val_loss: 0.0104 - val_root_mean_squared_error: 0.1018 Epoch 57/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0097 - root_mean_squared_error: 0.0982 - val_loss: 0.0102 - val_root_mean_squared_error: 0.1010 Epoch 58/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0100 - root_mean_squared_error: 0.1002 - val_loss: 0.0105 - val_root_mean_squared_error: 0.1026 Epoch 59/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0103 - root_mean_squared_error: 0.1013 - val_loss: 0.0098 - val_root_mean_squared_error: 0.0988 Epoch 60/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1007 - val_loss: 0.0100 - val_root_mean_squared_error: 0.0998 Epoch 61/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0101 - root_mean_squared_error: 0.1003 - val_loss: 0.0103 - val_root_mean_squared_error: 0.1016 Epoch 62/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1005 - val_loss: 0.0104 - val_root_mean_squared_error: 0.1021 Epoch 63/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0098 - root_mean_squared_error: 0.0992 - val_loss: 0.0096 - val_root_mean_squared_error: 0.0980 Epoch 64/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0097 - root_mean_squared_error: 0.0986 - val_loss: 0.0098 - val_root_mean_squared_error: 0.0991 Epoch 65/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0095 - root_mean_squared_error: 0.0973 - val_loss: 0.0098 - val_root_mean_squared_error: 0.0990 Epoch 66/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0096 - root_mean_squared_error: 0.0980 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0974 Epoch 67/150 10/10 [==============================] - 0s 27ms/step - loss: 0.0095 - root_mean_squared_error: 0.0976 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0967 Epoch 68/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1006 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0968 Epoch 69/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0098 - root_mean_squared_error: 0.0989 - val_loss: 0.0097 - val_root_mean_squared_error: 0.0987 Epoch 70/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0097 - root_mean_squared_error: 0.0983 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0973 Epoch 71/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0100 - root_mean_squared_error: 0.1000 - val_loss: 0.0091 - val_root_mean_squared_error: 0.0954 Epoch 72/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0089 - root_mean_squared_error: 0.0944 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0975 Epoch 73/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0093 - root_mean_squared_error: 0.0962 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0970 Epoch 74/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0097 - root_mean_squared_error: 0.0985 - val_loss: 0.0101 - val_root_mean_squared_error: 0.1004 Epoch 75/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0101 - root_mean_squared_error: 0.1004 - val_loss: 0.0100 - val_root_mean_squared_error: 0.1001 Epoch 76/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0089 - root_mean_squared_error: 0.0945 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0970 Epoch 77/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0093 - root_mean_squared_error: 0.0966 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0967 Epoch 78/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0092 - root_mean_squared_error: 0.0960 - val_loss: 0.0094 - val_root_mean_squared_error: 0.0970 Epoch 79/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0919 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0949 Epoch 80/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0090 - root_mean_squared_error: 0.0947 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939 Epoch 81/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0089 - root_mean_squared_error: 0.0941 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945 Epoch 82/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0094 - root_mean_squared_error: 0.0968 - val_loss: 0.0091 - val_root_mean_squared_error: 0.0955 Epoch 83/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0085 - root_mean_squared_error: 0.0924 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945 Epoch 84/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0089 - root_mean_squared_error: 0.0943 - val_loss: 0.0096 - val_root_mean_squared_error: 0.0977 Epoch 85/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0091 - root_mean_squared_error: 0.0955 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0943 Epoch 86/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0085 - root_mean_squared_error: 0.0922 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0961 Epoch 87/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0088 - root_mean_squared_error: 0.0936 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0924 Epoch 88/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0088 - root_mean_squared_error: 0.0935 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0946 Epoch 89/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0086 - root_mean_squared_error: 0.0928 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0943 Epoch 90/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0916 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0936 Epoch 91/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0090 - root_mean_squared_error: 0.0946 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0946 Epoch 92/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945 Epoch 93/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0084 - root_mean_squared_error: 0.0916 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0923 Epoch 94/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0083 - root_mean_squared_error: 0.0910 - val_loss: 0.0095 - val_root_mean_squared_error: 0.0976 Epoch 95/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0084 - root_mean_squared_error: 0.0919 - val_loss: 0.0084 - val_root_mean_squared_error: 0.0919 Epoch 96/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0907 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0927 Epoch 97/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0087 - root_mean_squared_error: 0.0933 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0947 Epoch 98/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0910 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0911 Epoch 99/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0903 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0927 Epoch 100/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0084 - root_mean_squared_error: 0.0918 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0950 Epoch 101/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0941 Epoch 102/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0088 - root_mean_squared_error: 0.0940 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0920 Epoch 103/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0081 - root_mean_squared_error: 0.0901 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0933 Epoch 104/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0914 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0922 Epoch 105/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0919 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0909 Epoch 106/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0915 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0923 Epoch 107/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0916 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0928 Epoch 108/150 10/10 [==============================] - 0s 18ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0922 Epoch 109/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0088 - root_mean_squared_error: 0.0940 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0935 Epoch 110/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939 Epoch 111/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0914 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939 Epoch 112/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0087 - root_mean_squared_error: 0.0933 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0922 Epoch 113/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0922 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0924 Epoch 114/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0928 Epoch 115/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0918 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0934 Epoch 116/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0081 - root_mean_squared_error: 0.0898 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0930 Epoch 117/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0915 - val_loss: 0.0091 - val_root_mean_squared_error: 0.0956 Epoch 118/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0087 - root_mean_squared_error: 0.0931 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0942 Epoch 119/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0084 - root_mean_squared_error: 0.0914 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0960 Epoch 120/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0085 - val_root_mean_squared_error: 0.0923 Epoch 121/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0922 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0926 Epoch 122/150 10/10 [==============================] - 0s 23ms/step - loss: 0.0083 - root_mean_squared_error: 0.0913 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0912 Epoch 123/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0079 - root_mean_squared_error: 0.0888 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0910 Epoch 124/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0082 - val_root_mean_squared_error: 0.0906 Epoch 125/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0903 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0935 Epoch 126/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0082 - root_mean_squared_error: 0.0904 - val_loss: 0.0083 - val_root_mean_squared_error: 0.0913 Epoch 127/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0084 - root_mean_squared_error: 0.0915 - val_loss: 0.0084 - val_root_mean_squared_error: 0.0914 Epoch 128/150 10/10 [==============================] - 0s 22ms/step - loss: 0.0080 - root_mean_squared_error: 0.0896 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0940 Epoch 129/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0079 - root_mean_squared_error: 0.0890 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939 Epoch 130/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0081 - root_mean_squared_error: 0.0898 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0931 Epoch 131/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0959 Epoch 132/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0935 Epoch 133/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0938 Epoch 134/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0079 - root_mean_squared_error: 0.0889 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0938 Epoch 135/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0085 - root_mean_squared_error: 0.0924 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939 Epoch 136/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0079 - root_mean_squared_error: 0.0886 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0945 Epoch 137/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0080 - root_mean_squared_error: 0.0892 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0950 Epoch 138/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0078 - root_mean_squared_error: 0.0881 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0933 Epoch 139/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0910 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0961 Epoch 140/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0082 - root_mean_squared_error: 0.0907 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0950 Epoch 141/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0079 - root_mean_squared_error: 0.0890 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0944 Epoch 142/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0959 Epoch 143/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0083 - root_mean_squared_error: 0.0911 - val_loss: 0.0086 - val_root_mean_squared_error: 0.0929 Epoch 144/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0089 - val_root_mean_squared_error: 0.0942 Epoch 145/150 10/10 [==============================] - 0s 21ms/step - loss: 0.0082 - root_mean_squared_error: 0.0905 - val_loss: 0.0096 - val_root_mean_squared_error: 0.0980 Epoch 146/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0080 - root_mean_squared_error: 0.0892 - val_loss: 0.0092 - val_root_mean_squared_error: 0.0958 Epoch 147/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0083 - root_mean_squared_error: 0.0913 - val_loss: 0.0088 - val_root_mean_squared_error: 0.0939 Epoch 148/150 10/10 [==============================] - 0s 19ms/step - loss: 0.0085 - root_mean_squared_error: 0.0920 - val_loss: 0.0093 - val_root_mean_squared_error: 0.0965 Epoch 149/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0079 - root_mean_squared_error: 0.0887 - val_loss: 0.0087 - val_root_mean_squared_error: 0.0934 Epoch 150/150 10/10 [==============================] - 0s 20ms/step - loss: 0.0083 - root_mean_squared_error: 0.0912 - val_loss: 0.0090 - val_root_mean_squared_error: 0.0948
history.history.keys()
dict_keys(['loss', 'root_mean_squared_error', 'val_loss', 'val_root_mean_squared_error'])
fig = plt.figure(figsize=(20,7))
fig.add_subplot(121)
# Accuracy
plt.plot(history.epoch, history.history['root_mean_squared_error'], label = "rmse")
plt.plot(history.epoch, history.history['val_root_mean_squared_error'], label = "val_rmse")
plt.title("RMSE", fontsize=18)
plt.xlabel("Epochs", fontsize=15)
plt.ylabel("RMSE", fontsize=15)
plt.grid(alpha=0.3)
plt.legend()
#Adding Subplot 1 (For Loss)
fig.add_subplot(122)
plt.plot(history.epoch, history.history['loss'], label="loss")
plt.plot(history.epoch, history.history['val_loss'], label="val_loss")
plt.title("Loss", fontsize=18)
plt.xlabel("Epochs", fontsize=15)
plt.ylabel("Loss", fontsize=15)
plt.grid(alpha=0.3)
plt.legend()
plt.show()
results = regressor.evaluate(X_test, y_test)
print("test loss, test acc:", np.round(results, 4))
18/18 [==============================] - 0s 6ms/step - loss: 0.0140 - root_mean_squared_error: 0.1182 test loss, test acc: [0.014 0.1182]
predictions = regressor.predict(X_test)
18/18 [==============================] - 1s 5ms/step
len(predictions)
571
from sklearn.metrics import r2_score
r2 = r2_score(y_test, predictions)
print("R-squared (R2) Score:", r2)
R-squared (R2) Score: 0.4263909117557261
LSTM_=pd.DataFrame(y_test , columns=["Test"])
LSTM_["LSTM"]=predictions
LSTM_
| Test | LSTM | |
|---|---|---|
| 0 | 0.000000 | 0.070894 |
| 1 | 0.001923 | 0.038783 |
| 2 | 0.001923 | 0.004139 |
| 3 | 0.003846 | -0.015221 |
| 4 | 0.046154 | 0.025185 |
| ... | ... | ... |
| 566 | 0.001923 | -0.001025 |
| 567 | 0.005769 | 0.006319 |
| 568 | 0.046154 | 0.138052 |
| 569 | 0.273077 | 0.430540 |
| 570 | 0.476923 | 0.304305 |
571 rows × 2 columns
pd.DataFrame(data_mod_scaled)[:571]
| 0 | |
|---|---|
| 0 | 0.196154 |
| 1 | 0.059615 |
| 2 | 0.117308 |
| 3 | 0.136538 |
| 4 | 0.065385 |
| ... | ... |
| 566 | 0.076923 |
| 567 | 0.326923 |
| 568 | 0.057692 |
| 569 | 0.000000 |
| 570 | 0.001923 |
571 rows × 1 columns
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
(856, 25, 1) (571, 25, 1) (856, 1) (571, 1)
# Fit ARIMA model
order = (5, 1, 0) # Example order parameters (p, d, q)
arima_model = sm.tsa.ARIMA(train_data, order=order)
arima_result = arima_model.fit()
seasonal_order = (2, 1, 1, 12)
sarima_model = sm.tsa.statespace.SARIMAX(train_data, order=order, seasonal_order=seasonal_order)
sarima_result = sarima_model.fit()
arima_forecast = arima_result.forecast(steps=len(test_data))
sarima_forecast = sarima_result.forecast(steps=len(test_data))
plt.figure(figsize=(12, 6))
plt.plot(train_data.index, train_data, label='Training data')
plt.plot(test_data.index, test_data, label='Test data')
plt.plot(test_data.index, sarima_forecast, label='SARIMA forecast')
plt.xlabel('Date')
plt.ylabel('Value')
plt.title(' SARIMA Forecasts')
plt.legend()
plt.show()
test_data
| 0 | |
|---|---|
| 1016 | 0.003846 |
| 1017 | 0.071154 |
| 1018 | 0.580769 |
| 1019 | 0.407692 |
| 1020 | 0.592308 |
| ... | ... |
| 1447 | 0.001923 |
| 1448 | 0.005769 |
| 1449 | 0.046154 |
| 1450 | 0.273077 |
| 1451 | 0.476923 |
436 rows × 1 columns
LSTM_
| Test | LSTM | |
|---|---|---|
| 0 | 0.000000 | 0.070894 |
| 1 | 0.001923 | 0.038783 |
| 2 | 0.001923 | 0.004139 |
| 3 | 0.003846 | -0.015221 |
| 4 | 0.046154 | 0.025185 |
| ... | ... | ... |
| 566 | 0.001923 | -0.001025 |
| 567 | 0.005769 | 0.006319 |
| 568 | 0.046154 | 0.138052 |
| 569 | 0.273077 | 0.430540 |
| 570 | 0.476923 | 0.304305 |
571 rows × 2 columns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
arima_forecast = arima_result.forecast(steps=len(test_data))
sarima_forecast = sarima_result.forecast(steps=len(test_data))
arima_mae = mean_absolute_error(test_data, arima_forecast)
arima_mse = mean_squared_error(test_data, arima_forecast)
arima_rmse = np.sqrt(arima_mse)
arima_r2 = r2_score(test_data, arima_forecast)
sarima_mae = mean_absolute_error(test_data, sarima_forecast)
sarima_mse = mean_squared_error(test_data, sarima_forecast)
sarima_rmse = np.sqrt(sarima_mse)
sarima_r2 = r2_score(test_data, sarima_forecast)
print("ARIMA Model:")
print("MAE:", arima_mae)
print("MSE:", arima_mse)
print("RMSE:", arima_rmse)
print("R-squared (R2) Score:", arima_r2)
print("\nSARIMA Model:")
print("MAE:", sarima_mae)
print("MSE:", sarima_mse)
print("RMSE:", sarima_rmse)
print("R-squared (R2) Score:", sarima_r2)
ARIMA Model: MAE: 0.10894611746410407 MSE: 0.03377255018414353 RMSE: 0.18377309428788408 R-squared (R2) Score: -0.27359553993274344 SARIMA Model: MAE: 0.058482050507392726 MSE: 0.012407223176341142 RMSE: 0.11138771555401045 R-squared (R2) Score: 0.5321116109331506
ForeCating=pd.DataFrame(sarima_forecast.reset_index() ,columns=["sarima_forecast"])
ForeCating=sarima_forecast.reset_index()
ForeCating.drop(["index"] , axis=1 , inplace=True)
ForeCating.columns=[["sarima_forecast"]]
ForeCating
| sarima_forecast | |
|---|---|
| 0 | 0.009516 |
| 1 | 0.078747 |
| 2 | 0.379815 |
| 3 | 0.262313 |
| 4 | 0.229006 |
| ... | ... |
| 431 | 0.018702 |
| 432 | 0.019970 |
| 433 | 0.085060 |
| 434 | 0.388992 |
| 435 | 0.270370 |
436 rows × 1 columns
test
| test | ARIMA | |
|---|---|---|
| Date | ||
| 1985-09-01 | 0.003846 | 0.100064 |
| 1985-10-01 | 0.071154 | 0.116146 |
| 1985-11-01 | 0.580769 | 0.156372 |
| 1985-12-01 | 0.407692 | 0.364222 |
| 1986-01-01 | 0.592308 | 0.205666 |
| ... | ... | ... |
| 2021-08-01 | 0.001923 | 0.041139 |
| 2021-09-01 | 0.005769 | 0.107671 |
| 2021-10-01 | 0.046154 | 0.042625 |
| 2021-11-01 | 0.273077 | 0.122090 |
| 2021-12-01 | 0.476923 | 0.136350 |
436 rows × 2 columns
from scipy.stats import kruskal
def seasonality_test(series):
seasoanl = False
idx = np.arange(len(series.index)) % 12
H_statistic, p_value = kruskal(series, idx)
if p_value <= 0.05:
seasonal = True
return seasonal
S_test=seasonality_test(Data[Fore])
Data[Fore].plot()
<Axes: xlabel='Date'>
data['Date'] = pd.to_datetime(data['Date'])
Data=data.copy()
data=Data.copy()
data
| Value | |
|---|---|
| Date | |
| 2001-01-01 | 10.2 |
| 2001-02-01 | 3.1 |
| 2001-03-01 | 6.1 |
| 2001-04-01 | 7.1 |
| 2001-05-01 | 3.4 |
| ... | ... |
| 2021-08-01 | 0.1 |
| 2021-09-01 | 0.3 |
| 2021-10-01 | 2.4 |
| 2021-11-01 | 14.2 |
| 2021-12-01 | 24.8 |
1452 rows × 1 columns
data.index
DatetimeIndex(['2001-01-01', '2001-02-01', '2001-03-01', '2001-04-01',
'2001-05-01', '2001-06-01', '2001-07-01', '2001-08-01',
'2001-09-01', '2001-10-01',
...
'2021-03-01', '2021-04-01', '2021-05-01', '2021-06-01',
'2021-07-01', '2021-08-01', '2021-09-01', '2021-10-01',
'2021-11-01', '2021-12-01'],
dtype='datetime64[ns]', name='Date', length=1452, freq=None)
type(data.index)
pandas.core.indexes.datetimes.DatetimeIndex
Data=data.copy()
print(Data.shape)
Train_nu= int(input("The Num u That wanna Train "))
Train=Data[:Train_nu]
Test=Data[Train_nu:]
(1452, 1) The Num u That wanna Train 440
type(Train.index)
pandas.core.indexes.datetimes.DatetimeIndex
from scipy.stats import kruskal
def seasonality_test(series):
seasoanl = False
idx = np.arange(len(series.index)) % 12
H_statistic, p_value = kruskal(series, idx)
if p_value <= 0.05:
seasonal = True
return seasonal
S_test=seasonality_test(Data[Fore])
Fore="Value"
Data[Fore].plot()
<Axes: xlabel='Date'>
"""plt.plot(data.index, data['Value'], label='Value')
plt.title('Time Series Plot')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()"""
"plt.plot(data.index, data['Value'], label='Value')\nplt.title('Time Series Plot')\nplt.xlabel('Date')\nplt.ylabel('Value')\nplt.legend()\nplt.show()"
"""from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
rolmean = timeseries.rolling(window=12).mean()
rolstd = timeseries.rolling(window=12).std()
# Plot rolling statistics:
plt.figure(figsize=(10, 6))
orig = plt.plot(timeseries, color='blue', label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label='Rolling Std')
plt.title('Rolling Statistics')
plt.legend(loc='best')
plt.show()
# Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
for key, value in dftest[4].items():
dfoutput['Critical Value (%s)' % key] = value
print(dfoutput)
test_stationarity(data['Value'])"""
"from statsmodels.tsa.seasonal import seasonal_decompose\nfrom statsmodels.tsa.stattools import adfuller\ndef test_stationarity(timeseries):\n rolmean = timeseries.rolling(window=12).mean()\n rolstd = timeseries.rolling(window=12).std()\n\n # Plot rolling statistics:\n plt.figure(figsize=(10, 6))\n orig = plt.plot(timeseries, color='blue', label='Original')\n mean = plt.plot(rolmean, color='red', label='Rolling Mean')\n std = plt.plot(rolstd, color='black', label='Rolling Std')\n plt.title('Rolling Statistics')\n plt.legend(loc='best')\n plt.show()\n\n # Perform Dickey-Fuller test:\n print('Results of Dickey-Fuller Test:')\n dftest = adfuller(timeseries, autolag='AIC')\n dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])\n for key, value in dftest[4].items():\n dfoutput['Critical Value (%s)' % key] = value\n print(dfoutput)\n\ntest_stationarity(data['Value'])"
"""decomposition = seasonal_decompose(data['Value'], model='additive', period=12)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid"""
"decomposition = seasonal_decompose(data['Value'], model='additive', period=12)\ntrend = decomposition.trend\nseasonal = decomposition.seasonal\nresidual = decomposition.resid"
"""plt.figure(figsize=(10, 8))
plt.subplot(411)
plt.plot(data['Value'], label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()"""
"plt.figure(figsize=(10, 8))\nplt.subplot(411)\nplt.plot(data['Value'], label='Original')\nplt.legend(loc='best')\nplt.subplot(412)\nplt.plot(trend, label='Trend')\nplt.legend(loc='best')\nplt.subplot(413)\nplt.plot(seasonal, label='Seasonality')\nplt.legend(loc='best')\nplt.subplot(414)\nplt.plot(residual, label='Residuals')\nplt.legend(loc='best')\nplt.tight_layout()"
"""plt.figure(figsize=(12, 6))
plt.subplot(211)
plt.plot(seasonal)
plt.title('Seasonal Component')
plt.subplot(212)
pd.plotting.autocorrelation_plot(data['Value'])
plt.title('Autocorrelation Plot')
plt.tight_layout()
plt.show()"""
"plt.figure(figsize=(12, 6))\nplt.subplot(211)\nplt.plot(seasonal)\nplt.title('Seasonal Component')\nplt.subplot(212)\npd.plotting.autocorrelation_plot(data['Value'])\nplt.title('Autocorrelation Plot')\nplt.tight_layout()\nplt.show()"
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.preprocessing import StandardScaler , MinMaxScaler
import statsmodels.api as sm
import seaborn as sns
import warnings
from pylab import rcParams
from statsmodels.tsa.seasonal import seasonal_decompose
from pandas.plotting import lag_plot
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import month_plot , quarter_plot
from scipy.stats import kruskal
Data
| Value | |
|---|---|
| Date | |
| 2001-01-01 | 10.2 |
| 2001-02-01 | 3.1 |
| 2001-03-01 | 6.1 |
| 2001-04-01 | 7.1 |
| 2001-05-01 | 3.4 |
| ... | ... |
| 2021-08-01 | 0.1 |
| 2021-09-01 | 0.3 |
| 2021-10-01 | 2.4 |
| 2021-11-01 | 14.2 |
| 2021-12-01 | 24.8 |
1452 rows × 1 columns
Data=data_daily[data_daily.columns.tolist()].resample("M").mean()
Data
| Value | |
|---|---|
| Date | |
| 2001-01-01 | 10.2 |
| 2001-02-01 | 3.1 |
| 2001-03-01 | 6.1 |
| 2001-04-01 | 7.1 |
| 2001-05-01 | 3.4 |
| ... | ... |
| 2021-08-01 | 0.1 |
| 2021-09-01 | 0.3 |
| 2021-10-01 | 2.4 |
| 2021-11-01 | 14.2 |
| 2021-12-01 | 24.8 |
1452 rows × 1 columns
plot=seasonal_decompose(data , model="additive" , period=30)
plot.plot();
Data=data.copy()
plot=seasonal_decompose(Data , model="additive")
plot.plot();
from sklearn.preprocessing import MinMaxScaler
def scale_numeric_columns(data):
data_scaled = data.copy()
scalers = {}
if isinstance(data, pd.DataFrame):
columns_to_scale = data.columns
elif isinstance(data, pd.Series):
columns_to_scale = [data.name]
data = data.to_frame()
for c in columns_to_scale:
if data[c].dtype != 'object':
scalers[c] = MinMaxScaler()
data_scaled[c] = scalers[c].fit_transform(data[[c]])
return data_scaled
Data_scaled = scale_numeric_columns(Data)
Data =pd.DataFrame( Data_scaled["Value"] , columns=["Value"])
Data
| Value | |
|---|---|
| Date | |
| 2001-01-01 | 0.196154 |
| 2001-02-01 | 0.059615 |
| 2001-03-01 | 0.117308 |
| 2001-04-01 | 0.136538 |
| 2001-05-01 | 0.065385 |
| ... | ... |
| 2021-08-01 | 0.001923 |
| 2021-09-01 | 0.005769 |
| 2021-10-01 | 0.046154 |
| 2021-11-01 | 0.273077 |
| 2021-12-01 | 0.476923 |
1452 rows × 1 columns
plot_acf(Data, lags=10);
plot_pacf(Data , lags=5) ;
Data
| Value | |
|---|---|
| Date | |
| 2001-01-01 | 0.196154 |
| 2001-02-01 | 0.059615 |
| 2001-03-01 | 0.117308 |
| 2001-04-01 | 0.136538 |
| 2001-05-01 | 0.065385 |
| ... | ... |
| 2021-08-01 | 0.001923 |
| 2021-09-01 | 0.005769 |
| 2021-10-01 | 0.046154 |
| 2021-11-01 | 0.273077 |
| 2021-12-01 | 0.476923 |
1452 rows × 1 columns
axes = Data.plot(marker='.', alpha=0.8,
figsize=(30,25))
lag_plot(Data)
<Axes: xlabel='y(t)', ylabel='y(t + 1)'>
autocorrelation_plot(Data)
<Axes: xlabel='Lag', ylabel='Autocorrelation'>
from statsmodels.tsa.stattools import adfuller
def adf_test(series , title=''):
print(f'Augmented Dickey-fuller Test: {title}')
Result=adfuller(series.dropna() , autolag="AIC")
Lable=["ADF Test Statistic " , "P-Value " , "Lags Used " ,"Observations"]
Out=pd.Series(Result[0:4], index=Lable)
for Key , Val in Result[4].items():
Out['Critical Value %s'% Key]=Val
print(Out.to_string)
if Result[1]<=0.05:
print("Strong Evdiance Against The Null Hypohtesis")
print("Reject The Null Hypohtesis ")
print("Data Has No Unit Root Its Stationary")
else :
print("Week Evdiance Against The Null Hypohtesis")
print("Fail to Reject The Null Hypohtesis ")
print("Data Has A Unit Root Its Non-Stationary")
adf_test(Data)
Augmented Dickey-fuller Test: <bound method Series.to_string of ADF Test Statistic -5.659423e+00 P-Value 9.454858e-07 Lags Used 2.300000e+01 Observations 1.428000e+03 Critical Value 1% -3.434938e+00 Critical Value 5% -2.863566e+00 Critical Value 10% -2.567849e+00 dtype: float64> Strong Evdiance Against The Null Hypohtesis Reject The Null Hypohtesis Data Has No Unit Root Its Stationary
data_daily = Data
data_daily
| Value | |
|---|---|
| Date | |
| 2001-01-01 | 0.196154 |
| 2001-02-01 | 0.059615 |
| 2001-03-01 | 0.117308 |
| 2001-04-01 | 0.136538 |
| 2001-05-01 | 0.065385 |
| ... | ... |
| 2021-08-01 | 0.001923 |
| 2021-09-01 | 0.005769 |
| 2021-10-01 | 0.046154 |
| 2021-11-01 | 0.273077 |
| 2021-12-01 | 0.476923 |
1452 rows × 1 columns
baseline = data_daily.rolling(window=10).mean()
plt.figure(figsize=(15,3))
plt.plot(data_daily, c='blue',label='Data ')
plt.plot(baseline, c='red', label='Rolling mean')
plt.legend(fontsize=12)
plt.ylabel('')
plt.margins(x=0)
plt.grid()
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score
baseline = data_daily.shift(1)
baseline = baseline.dropna()
data_actual = data_daily.iloc[1:]
size = int(len(data_actual)*0.7)
train = data_daily[:size]
test = data_actual[size:]
baseline_test = baseline.loc[test.index[0]:]
#Plot
plt.plot(test, c='blue',label='Data - Value ')
plt.plot(baseline_test, c='red', label='Persistence Algorithm')
plt.legend(fontsize=12)
plt.ylabel('kW')
plt.margins(x=0)
plt.title('Test part of the data'), plt.grid(), plt.xticks(rotation=45);
# calcolo errore
print('RMSE: %.3f' % np.sqrt(mean_squared_error(test, baseline_test)))
MAE = mean_absolute_error(test, baseline_test)
MAPE = np.mean(np.abs(baseline_test - test)/np.abs(test))
MASE = np.mean(np.abs(test - baseline_test ))/(np.abs(np.diff(train)).sum()/(len(train)-1))
print('MAE: %.3f' % MAE)
print('MAPE: %.3f' %MAPE)
print('MASE: %.3f' %MASE)
print('R^2 score: %.3f' % r2_score(test, baseline_test))
RMSE: 0.199 MAE: 0.121 MAPE: inf MASE: inf R^2 score: -0.487
import math
def Matrix(Y_test, Y_pred):
y_test = Y_test.to_numpy() # Convert Pandas Series to NumPy array
y_estimated = Y_pred
mse = mean_squared_error(y_test, y_estimated)
m_test = sum(y_test) / len(y_test)
m_estimated = sum(y_estimated) / len(y_estimated)
up = sum([(y_test[i] - m_test) * (y_estimated[i] - m_estimated) for i in range(0, len(y_test))])
sum1 = sum([pow(y_test[i] - m_test, 2) for i in range(0, len(y_test))])
sum2 = sum([pow(y_estimated[i] - m_estimated, 2) for i in range(0, len(y_test))])
down = math.sqrt(sum1 * sum2)
r = up / down
rmse = math.sqrt(mse)
mae = mean_absolute_error(y_test, y_estimated)
mbe = sum([(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))]) / len(y_test)
R2 = r * r
RRMSE = (rmse * 100) / (m_test) # mean(SD_obs)
# nse calculation
y_test_avg = sum(y_test) / len(y_test)
nse_numerator = sum([(y_test[i] - y_estimated[i]) ** 2 for i in range(0, len(y_test))])
nse_denominator = sum([(y_test[i] - y_test_avg) ** 2 for i in range(0, len(y_test))])
NSE = 1 - nse_numerator / nse_denominator
# WI calculation
wi_numerator = sum([abs(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))])
wi_denominator = 2 * sum([abs(y_test[i] - y_test_avg) for i in range(0, len(y_test))])
WI = 1 - wi_numerator / wi_denominator
return mse, rmse, mae, mbe, r, R2, RRMSE, NSE, WI
baseline_test
| Value | |
|---|---|
| Date | |
| 1985-09-01 | 0.001923 |
| 1985-10-01 | 0.003846 |
| 1985-11-01 | 0.071154 |
| 1985-12-01 | 0.580769 |
| 1986-01-01 | 0.407692 |
| ... | ... |
| 2021-08-01 | 0.001923 |
| 2021-09-01 | 0.001923 |
| 2021-10-01 | 0.005769 |
| 2021-11-01 | 0.046154 |
| 2021-12-01 | 0.273077 |
436 rows × 1 columns
baseline=pd.DataFrame(test)
baseline["baseline"] = baseline_test
test_baseline=test.copy()
test_baseline
| Value | baseline | |
|---|---|---|
| Date | ||
| 1985-09-01 | 0.003846 | 0.001923 |
| 1985-10-01 | 0.071154 | 0.003846 |
| 1985-11-01 | 0.580769 | 0.071154 |
| 1985-12-01 | 0.407692 | 0.580769 |
| 1986-01-01 | 0.592308 | 0.407692 |
| ... | ... | ... |
| 2021-08-01 | 0.001923 | 0.001923 |
| 2021-09-01 | 0.005769 | 0.001923 |
| 2021-10-01 | 0.046154 | 0.005769 |
| 2021-11-01 | 0.273077 | 0.046154 |
| 2021-12-01 | 0.476923 | 0.273077 |
436 rows × 2 columns
new_row = pd.DataFrame([Matrix(test.Value ,test["baseline"] )])
Z=["mse", "rmse", "mae", "mbe", "r", "R2", "RRMSE", "NSE", "WI"]
new_row.columns =Z
result = new_row
result
| mse | rmse | mae | mbe | r | R2 | RRMSE | NSE | WI | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.039432 | 0.198574 | 0.120505 | 0.001089 | 0.252725 | 0.06387 | 170.707793 | -0.48701 | 0.513604 |
rollingMEAN = data_daily.rolling(window=10).mean()
rollingSTD = data_daily.rolling(window=10).std()
fig, (ax1, ax2) = plt.subplots(2,1)
plt.subplots_adjust(hspace=0.4)
ax1.plot(data_daily, c='blue',label='Data ')
ax1.plot(rollingMEAN, c='red', label='Rolling mean')
ax2.plot(rollingSTD, c='black',label = 'Rolling Std')
ax1.legend(fontsize=12), ax2.legend(fontsize=12)
ax1.set_ylabel('kW'), ax2.set_ylabel('kW')
ax1.margins(x=0), ax2.margins(x=0)
ax1.grid(), ax2.grid()
(None, None)
size = int(len(data_daily)*0.7)
train = data_daily[:size]
test = data_daily[size:]
test
| Value | |
|---|---|
| Date | |
| 1985-09-01 | 0.003846 |
| 1985-10-01 | 0.071154 |
| 1985-11-01 | 0.580769 |
| 1985-12-01 | 0.407692 |
| 1986-01-01 | 0.592308 |
| ... | ... |
| 2021-08-01 | 0.001923 |
| 2021-09-01 | 0.005769 |
| 2021-10-01 | 0.046154 |
| 2021-11-01 | 0.273077 |
| 2021-12-01 | 0.476923 |
436 rows × 1 columns
n = 1
X = data_daily.values
size = int(len(X) * 0.7)
train, test = X[0:size], X[size:len(X)]
predictions = list()
confidence = list()
history = [x for x in train]
for t in range(0,len(test),n):
model = ARIMA(history, order=(2,0,1))
model_fit = model.fit()
output = model_fit.forecast(n).tolist()
conf = model_fit.get_forecast(n).conf_int(0.05)
predictions.extend(output)
confidence.extend(conf)
obs = test.tolist()[t:t+n]
history = history[n:]
history.extend(obs);
conf_int = np.vstack(confidence)
m = len(predictions) - len(test)
index_extended = data_daily[size:].index.union(data_daily[size:].index.shift((m))[-(m):])
predictions_series = pd.Series(predictions, index=index_extended)
confidence = pd.DataFrame(conf_int, columns=['lower', 'upper'])
test = pd.DataFrame(test)
test["ARIMA"] = predictions
test.columns=["test" , "ARIMA"]
test.index = Data[size:len(X)].index
test
| test | ARIMA | |
|---|---|---|
| Date | ||
| 1985-09-01 | 0.003846 | 0.100064 |
| 1985-10-01 | 0.071154 | 0.116146 |
| 1985-11-01 | 0.580769 | 0.156372 |
| 1985-12-01 | 0.407692 | 0.364222 |
| 1986-01-01 | 0.592308 | 0.205666 |
| ... | ... | ... |
| 2021-08-01 | 0.001923 | 0.041139 |
| 2021-09-01 | 0.005769 | 0.107671 |
| 2021-10-01 | 0.046154 | 0.042625 |
| 2021-11-01 | 0.273077 | 0.122090 |
| 2021-12-01 | 0.476923 | 0.136350 |
436 rows × 2 columns
Matrix(test["test"] ,test["ARIMA"] )
(0.02424989839764312, 0.15572378879812526, 0.10868618039698635, -0.0008996842220513843, 0.3139685042298476, 0.09857622164832784, 133.87061538812753, 0.08551287140995589, 0.5613105584591628)
new_row = pd.DataFrame([Matrix(test["test"] ,test["ARIMA"] )])
new_row.columns =Z
new_row
| mse | rmse | mae | mbe | r | R2 | RRMSE | NSE | WI | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.02425 | 0.155724 | 0.108686 | -0.0009 | 0.313969 | 0.098576 | 133.870615 | 0.085513 | 0.561311 |
result_df = pd.concat([result, new_row], axis=0 , keys=['baseline' , 'ARIMA'] )
result_df
| mse | rmse | mae | mbe | r | R2 | RRMSE | NSE | WI | ||
|---|---|---|---|---|---|---|---|---|---|---|
| baseline | 0 | 0.039432 | 0.198574 | 0.120505 | 0.001089 | 0.252725 | 0.063870 | 170.707793 | -0.487010 | 0.513604 |
| ARIMA | 0 | 0.024250 | 0.155724 | 0.108686 | -0.000900 | 0.313969 | 0.098576 | 133.870615 | 0.085513 | 0.561311 |
ForeCating
| sarima_forecast | |
|---|---|
| 0 | 0.009516 |
| 1 | 0.078747 |
| 2 | 0.379815 |
| 3 | 0.262313 |
| 4 | 0.229006 |
| ... | ... |
| 431 | 0.018702 |
| 432 | 0.019970 |
| 433 | 0.085060 |
| 434 | 0.388992 |
| 435 | 0.270370 |
436 rows × 1 columns
LSTM_
| Test | LSTM | |
|---|---|---|
| 0 | 0.000000 | 0.070894 |
| 1 | 0.001923 | 0.038783 |
| 2 | 0.001923 | 0.004139 |
| 3 | 0.003846 | -0.015221 |
| 4 | 0.046154 | 0.025185 |
| ... | ... | ... |
| 566 | 0.001923 | -0.001025 |
| 567 | 0.005769 | 0.006319 |
| 568 | 0.046154 | 0.138052 |
| 569 | 0.273077 | 0.430540 |
| 570 | 0.476923 | 0.304305 |
571 rows × 2 columns
test_=test.reset_index(inplace=True)
ForeCating
| sarima_forecast | |
|---|---|
| 0 | 0.009516 |
| 1 | 0.078747 |
| 2 | 0.379815 |
| 3 | 0.262313 |
| 4 | 0.229006 |
| ... | ... |
| 431 | 0.018702 |
| 432 | 0.019970 |
| 433 | 0.085060 |
| 434 | 0.388992 |
| 435 | 0.270370 |
436 rows × 1 columns
test.drop(["Date"] , axis=1 , inplace=True)
test["SARIMA"]=ForeCating["sarima_forecast"]
pd.DataFrame(test)
| test | ARIMA | SARIMA | |
|---|---|---|---|
| 0 | 0.003846 | 0.100064 | 0.009516 |
| 1 | 0.071154 | 0.116146 | 0.078747 |
| 2 | 0.580769 | 0.156372 | 0.379815 |
| 3 | 0.407692 | 0.364222 | 0.262313 |
| 4 | 0.592308 | 0.205666 | 0.229006 |
| ... | ... | ... | ... |
| 431 | 0.001923 | 0.041139 | 0.018702 |
| 432 | 0.005769 | 0.107671 | 0.019970 |
| 433 | 0.046154 | 0.042625 | 0.085060 |
| 434 | 0.273077 | 0.122090 | 0.388992 |
| 435 | 0.476923 | 0.136350 | 0.270370 |
436 rows × 3 columns
LSTM_
| Test | LSTM | |
|---|---|---|
| 0 | 0.000000 | 0.070894 |
| 1 | 0.001923 | 0.038783 |
| 2 | 0.001923 | 0.004139 |
| 3 | 0.003846 | -0.015221 |
| 4 | 0.046154 | 0.025185 |
| ... | ... | ... |
| 566 | 0.001923 | -0.001025 |
| 567 | 0.005769 | 0.006319 |
| 568 | 0.046154 | 0.138052 |
| 569 | 0.273077 | 0.430540 |
| 570 | 0.476923 | 0.304305 |
571 rows × 2 columns
import math
def Matrix(Y_test, Y_pred):
y_test = Y_test.to_numpy() # Convert Pandas Series to NumPy array
y_estimated = Y_pred
mse = mean_squared_error(y_test, y_estimated)
m_test = sum(y_test) / len(y_test)
m_estimated = sum(y_estimated) / len(y_estimated)
up = sum([(y_test[i] - m_test) * (y_estimated[i] - m_estimated) for i in range(0, len(y_test))])
sum1 = sum([pow(y_test[i] - m_test, 2) for i in range(0, len(y_test))])
sum2 = sum([pow(y_estimated[i] - m_estimated, 2) for i in range(0, len(y_test))])
down = math.sqrt(sum1 * sum2)
r = up / down
rmse = math.sqrt(mse)
mae = mean_absolute_error(y_test, y_estimated)
mbe = sum([(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))]) / len(y_test)
R2 = r * r
RRMSE = (rmse * 100) / (m_test) # mean(SD_obs)
# nse calculation
y_test_avg = sum(y_test) / len(y_test)
nse_numerator = sum([(y_test[i] - y_estimated[i]) ** 2 for i in range(0, len(y_test))])
nse_denominator = sum([(y_test[i] - y_test_avg) ** 2 for i in range(0, len(y_test))])
NSE = 1 - nse_numerator / nse_denominator
# WI calculation
wi_numerator = sum([abs(y_test[i] - y_estimated[i]) for i in range(0, len(y_test))])
wi_denominator = 2 * sum([abs(y_test[i] - y_test_avg) for i in range(0, len(y_test))])
WI = 1 - wi_numerator / wi_denominator
return mse, rmse, mae, mbe, r, R2, RRMSE, NSE, WI
LSTM_
| Test | LSTM | |
|---|---|---|
| 0 | 0.000000 | 0.070894 |
| 1 | 0.001923 | 0.038783 |
| 2 | 0.001923 | 0.004139 |
| 3 | 0.003846 | -0.015221 |
| 4 | 0.046154 | 0.025185 |
| ... | ... | ... |
| 566 | 0.001923 | -0.001025 |
| 567 | 0.005769 | 0.006319 |
| 568 | 0.046154 | 0.138052 |
| 569 | 0.273077 | 0.430540 |
| 570 | 0.476923 | 0.304305 |
571 rows × 2 columns
LSTM = pd.DataFrame([Matrix(LSTM_["Test"] ,LSTM_["LSTM"] )])
LSTM.columns =Z
LSTM
| mse | rmse | mae | mbe | r | R2 | RRMSE | NSE | WI | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.013976 | 0.118221 | 0.074762 | -0.018328 | 0.673703 | 0.453875 | 104.077459 | 0.426391 | 0.684735 |
test["baseline"]=test_baseline["baseline"].values
test
| test | ARIMA | SARIMA | baseline | |
|---|---|---|---|---|
| 0 | 0.003846 | 0.100064 | 0.009516 | 0.001923 |
| 1 | 0.071154 | 0.116146 | 0.078747 | 0.003846 |
| 2 | 0.580769 | 0.156372 | 0.379815 | 0.071154 |
| 3 | 0.407692 | 0.364222 | 0.262313 | 0.580769 |
| 4 | 0.592308 | 0.205666 | 0.229006 | 0.407692 |
| ... | ... | ... | ... | ... |
| 431 | 0.001923 | 0.041139 | 0.018702 | 0.001923 |
| 432 | 0.005769 | 0.107671 | 0.019970 | 0.001923 |
| 433 | 0.046154 | 0.042625 | 0.085060 | 0.005769 |
| 434 | 0.273077 | 0.122090 | 0.388992 | 0.046154 |
| 435 | 0.476923 | 0.136350 | 0.270370 | 0.273077 |
436 rows × 4 columns
result_df = pd.concat([result, new_row , SARIMA , LSTM], axis=0 , keys=['baseline' , 'ARIMA' , "SARIMA" ,"LSTM"] )
result_df
| mse | rmse | mae | mbe | r | R2 | RRMSE | NSE | WI | ||
|---|---|---|---|---|---|---|---|---|---|---|
| baseline | 0 | 0.039432 | 0.198574 | 0.120505 | 0.001089 | 0.252725 | 0.063870 | 170.707793 | -0.487010 | 0.513604 |
| ARIMA | 0 | 0.024250 | 0.155724 | 0.108686 | -0.000900 | 0.313969 | 0.098576 | 133.870615 | 0.085513 | 0.561311 |
| SARIMA | 0 | 0.012407 | 0.111388 | 0.058482 | -0.002789 | 0.729863 | 0.532699 | 95.756353 | 0.532112 | 0.763949 |
| LSTM | 0 | 0.013976 | 0.118221 | 0.074762 | -0.018328 | 0.673703 | 0.453875 | 104.077459 | 0.426391 | 0.684735 |
result_df.index= ["baseline" , "ARIMA" , "SARIMA" , "LSTM"]
result_df
| mse | rmse | mae | mbe | r | R2 | RRMSE | NSE | WI | |
|---|---|---|---|---|---|---|---|---|---|
| baseline | 0.039432 | 0.198574 | 0.120505 | 0.001089 | 0.252725 | 0.063870 | 170.707793 | -0.487010 | 0.513604 |
| ARIMA | 0.024250 | 0.155724 | 0.108686 | -0.000900 | 0.313969 | 0.098576 | 133.870615 | 0.085513 | 0.561311 |
| SARIMA | 0.012407 | 0.111388 | 0.058482 | -0.002789 | 0.729863 | 0.532699 | 95.756353 | 0.532112 | 0.763949 |
| LSTM | 0.013976 | 0.118221 | 0.074762 | -0.018328 | 0.673703 | 0.453875 | 104.077459 | 0.426391 | 0.684735 |
result_df.rename_axis(index=['Models'] , inplace=True )
Prediction_ = test.copy()
Prediction_
| test | ARIMA | SARIMA | baseline | |
|---|---|---|---|---|
| 0 | 0.003846 | 0.100064 | 0.009516 | 0.001923 |
| 1 | 0.071154 | 0.116146 | 0.078747 | 0.003846 |
| 2 | 0.580769 | 0.156372 | 0.379815 | 0.071154 |
| 3 | 0.407692 | 0.364222 | 0.262313 | 0.580769 |
| 4 | 0.592308 | 0.205666 | 0.229006 | 0.407692 |
| ... | ... | ... | ... | ... |
| 431 | 0.001923 | 0.041139 | 0.018702 | 0.001923 |
| 432 | 0.005769 | 0.107671 | 0.019970 | 0.001923 |
| 433 | 0.046154 | 0.042625 | 0.085060 | 0.005769 |
| 434 | 0.273077 | 0.122090 | 0.388992 | 0.046154 |
| 435 | 0.476923 | 0.136350 | 0.270370 | 0.273077 |
436 rows × 4 columns
plt.figure()
for model in Prediction_.columns:
if model != 'test':
plt.scatter(Prediction_['test'], Prediction_[model], label=model)
plt.xlabel('Original Values')
plt.ylabel('Predicted Values')
plt.title('Scatter Plot of Model Predictions vs. Original Values')
plt.savefig("Scatter Plot of Model Predictions vs. Original Values.png", dpi=400)
plt.legend()
plt.grid()
plt.show()
prediction_errors = Prediction_.drop(columns='test').subtract(Prediction_['test'], axis=0)
plt.figure()
prediction_errors.boxplot()
plt.xticks(rotation=45)
plt.ylabel('Prediction Errors')
plt.title('Box Plot of Prediction Errors for Each Model')
plt.grid()
plt.savefig("Box Plot of Prediction Errors for Each Model.png", dpi=400)
plt.show()
plt.figure(figsize=(10, 6))
prediction_errors = Prediction_.drop(columns='test').subtract(Prediction_['test'], axis=0)
prediction_errors = prediction_errors.melt(var_name='Model', value_name='Error')
sns.violinplot(data=prediction_errors, x='Model', y='Error')
plt.xticks(rotation=45)
plt.ylabel('Prediction Errors')
plt.title('Violin Plot of Prediction Errors for Each Model')
plt.savefig("Violin Plot of Prediction Errors for Each Model.png", dpi=400)
plt.grid()
plt.show()
plt.figure(figsize=(25, 18))
for model in Prediction_.columns:
if model != 'test':
residuals = Prediction_[model] - Prediction_['test']
plt.plot(Prediction_.index, residuals, label=model)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Data Points')
plt.ylabel('Residuals')
plt.title('Residual Plot of Model Predictions')
plt.legend()
plt.grid()
plt.savefig("Residual Plot of Model Predictions.png", dpi=400)
plt.show()
result = result_df
Features =list( result.columns)
Features
['mse', 'rmse', 'mae', 'mbe', 'r', 'R2', 'RRMSE', 'NSE', 'WI']
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import math
metrics_to_plot = Features
num_features = len(metrics_to_plot)
cols = math.ceil(math.sqrt(num_features))
rows = math.ceil(num_features / cols)
plt.figure(figsize=(12, 8))
for i, metric in enumerate(metrics_to_plot):
plt.subplot(rows, cols, i + 1)
stats.probplot(result[metric], dist="norm", plot=plt)
plt.title(f'Q-Q Plot for {metric.upper()}', fontweight='bold')
plt.grid()
plt.tight_layout(h_pad=0.5, w_pad=0.5)
plt.savefig("qq_plots.png", dpi=300)
plt.tight_layout()
plt.show()
plt.figure(figsize=(12, 8))
sns.heatmap(result[metrics_to_plot], annot=True, fmt=".2f", cmap="YlGnBu")
plt.xlabel('Metrics')
plt.ylabel('Models')
plt.title('Performance Metric Comparison (Heatmap)')
plt.savefig("Performance Metric Comparison (Heatmap).png", dpi=300)
plt.xticks(rotation=45)
plt.show()
from pandas.plotting import parallel_coordinates
plt.figure(figsize=(12, 8))
parallel_coordinates(result.reset_index(), 'Models', colormap='viridis')
plt.xlabel('Metrics')
plt.ylabel('Metric Values')
plt.title('Parallel Coordinates Plot of Model Comparison')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.savefig("Parallel Coordinates Plot of Model Comparison.png", dpi=400)
plt.show()
from math import pi
normalized_metrics = (result[metrics_to_plot] - result[metrics_to_plot].min()) / (result[metrics_to_plot].max() - result[metrics_to_plot].min())
plt.figure(figsize=(10, 10))
for model in result.index:
values = normalized_metrics.loc[model].tolist()
values += values[:1]
angles = [n / float(len(metrics_to_plot)) * 2 * pi for n in range(len(metrics_to_plot))]
angles += angles[:1]
plt.polar(angles, values, label=model)
plt.title('Radar Plot of Performance Metrics')
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
plt.savefig("Radar Plot of Performance Metrics.png", dpi=400)
plt.show()
predictions_df = Prediction_
Prediction_.columns
Index(['test', 'ARIMA', 'SARIMA', 'baseline'], dtype='object')
plt.plot(predictions_df.index, predictions_df['test'], label='Actual', marker='o')
plt.plot(predictions_df.index, predictions_df['baseline'], label='Baseline', marker='o')
plt.plot(predictions_df.index, predictions_df['ARIMA'], label='ARIMA', marker='o')
plt.title('Predictions vs Actual Values')
plt.xlabel('DateTime')
plt.ylabel('Values')
plt.legend()
plt.show()
for model in Prediction_.columns:
plt.plot(predictions_df.index, predictions_df[model], label=model, marker='o')
plt.plot(predictions_df.index, predictions_df['test'], label='Actual', linestyle='--', color='black', marker='o')
plt.title('Predictions vs Actual Values')
plt.xlabel('DateTime')
plt.ylabel('Values')
plt.legend()
plt.show()
for model in Prediction_.columns:
residuals = predictions_df['test'] - predictions_df[model]
plt.figure(figsize=(12, 6))
plt.plot(predictions_df.index, residuals, label=f'{model} Residuals', marker='o')
plt.axhline(y=0, color='red', linestyle='--', label='Zero Residuals')
plt.title(f'{model} Residuals Analysis')
plt.xlabel('DateTime')
plt.ylabel('Residuals')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
for model in :
residuals = predictions_df['test'] - predictions_df[model]
sns.histplot(residuals, kde=True, label=f'{model} Residuals', bins=30)
plt.title('Distribution of Residuals for Each Model')
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.legend()
plt.show()
plt.figure(figsize=(12, 6))
for model in Prediction_.columns:
residuals = predictions_df['test'] - predictions_df[model]
sns.ecdfplot(residuals, label=f'{model} Residuals')
plt.title('Cumulative Distribution Function of Residuals for Each Model')
plt.xlabel('Residuals')
plt.ylabel('Cumulative Probability')
plt.legend()
plt.show()
Col=Prediction_.columns
Col
Index(['test', 'ARIMA', 'SARIMA', 'baseline'], dtype='object')
from statsmodels.graphics.tsaplots import plot_acf
plt.figure(figsize=(12, 6))
for model in Col:
residuals = predictions_df['test'] - predictions_df[model]
plot_acf(residuals, lags=5, label=f'{model} Residuals', alpha=0.05)
plt.title('Autocorrelation of Residuals for Each Model')
plt.xlabel('Lags')
plt.ylabel('Autocorrelation')
plt.legend()
plt.show()
<Figure size 1200x600 with 0 Axes>
plt.figure(figsize=(12, 8))
for model in Col:
residuals = predictions_df['test'] - predictions_df[model]
sns.scatterplot(x=predictions_df[model], y=residuals, label=f'{model} Residuals', alpha=0.7)
plt.axhline(y=0, color='red', linestyle='--', label='Zero Residuals')
plt.title('Scatter Plot of Predictions vs Residuals')
plt.xlabel('Predictions')
plt.ylabel('Residuals')
plt.legend()
plt.show()
LSTM_.to_csv("Data_ml/LSTMPrediction.csv", index=False)
result_df.to_csv("Data_ml/Results.csv" , index=False)
Prediction_.to_csv("Data_ml/Forecasting.csv" , index=False)